The first set of data is from phantom camera A. First, we will perform exploratory data analysis:

Load libraries and read in the data. Reformat as necessary

#Reformat data
tidy_data <- data[,c(1,2,3,6:13)]
colnames(tidy_data)= c("Tools", "Camera","Frame",  "Q0", "Qx","Qy", "Qz",  "Tx",  "Ty", "Tz" ,"Error")
glimpse(tidy_data)
## Rows: 208
## Columns: 11
## $ Tools  <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
## $ Camera <chr> "Port 1: NuVasive NavilinkLeft  s/n:3C444000", "Port 1: NuVasiv…
## $ Frame  <int> 3992479, 3992482, 3992485, 3992488, 3992491, 3992494, 3992497, …
## $ Q0     <dbl> 0.0405939, 0.0406833, 0.0405931, 0.0407776, 0.0408771, 0.040782…
## $ Qx     <dbl> -0.7374998, -0.7374906, -0.7375060, -0.7375042, -0.7375144, -0.…
## $ Qy     <dbl> -0.6721107, -0.6721030, -0.6721018, -0.6720792, -0.6720633, -0.…
## $ Qz     <dbl> 0.0520900, 0.0522498, 0.0521179, 0.0522908, 0.0522733, 0.052242…
## $ Tx     <dbl> -288.466, -288.474, -288.496, -288.522, -288.544, -288.541, -28…
## $ Ty     <dbl> -412.326, -412.330, -412.297, -412.252, -412.223, -412.221, -41…
## $ Tz     <dbl> -2254.760, -2254.786, -2254.812, -2254.777, -2254.766, -2254.73…
## $ Error  <dbl> 0.1415059, 0.1299241, 0.1256736, 0.1242455, 0.1416914, 0.162591…

Data Size:

## [1] 208  11

Summary Statistics of each column:

#summary of Q0
summary(tidy_data$Q0)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04042 0.04064 0.04073 0.04072 0.04081 0.04108
sd(tidy_data$Q0) #standard deviation
## [1] 0.0001200495

Look at distribution of Q0

p1 <- ggplot(data = tidy_data, aes(x = Q0)) + geom_histogram()
p2 <- ggplot(data = tidy_data, aes(x = Q0)) + geom_boxplot()
grid.arrange(p1,p2, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Summary statistics of Qx
summary(tidy_data$Qx)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.7375 -0.7375 -0.7375 -0.7375 -0.7375 -0.7375
sd(tidy_data$Qx) #standard deviation
## [1] 7.80372e-06
#Distribution of Qx
p3 <- ggplot(data = tidy_data, aes(x = Qx)) + geom_histogram()
p4 <- ggplot(data = tidy_data, aes(x = Qx)) + geom_boxplot()
grid.arrange(p3,p4, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#summary statistics of Qy
summary(tidy_data$Qy)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.6721 -0.6721 -0.6721 -0.6721 -0.6721 -0.6721
sd(tidy_data$Qy) #standard deviation
## [1] 1.325869e-05
#distribution of Qy
p5 <- ggplot(data = tidy_data, aes(x = Qy)) + geom_histogram()
p6 <- ggplot(data = tidy_data, aes(x = Qy)) + geom_boxplot()
grid.arrange(p5,p6, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Summary statistics of Qz
summary(tidy_data$Qz)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.05209 0.05225 0.05232 0.05233 0.05242 0.05262
sd(tidy_data$Qz) #standard deviation
## [1] 0.000119138
#Distribution of Qz
p7 <- ggplot(data = tidy_data, aes(x = Qz)) + geom_histogram()
p8 <- ggplot(data = tidy_data, aes(x = Qz)) + geom_boxplot()
grid.arrange(p7,p8, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Look at 3D distribution
fig <- plot_ly(tidy_data, x = ~Qx, y = ~Qy, z = ~Qz,color= ~Q0)
fig
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

We can see that Qx, Qy and Qz form a plane. Next we look at Tx, Ty and Tz.

#Summary statistics of Tx
summary(tidy_data$Tx)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -288.6  -288.5  -288.5  -288.5  -288.5  -288.5
sd(tidy_data$Tx) #Standard deviation
## [1] 0.0149809
#Distribution 
p9 <- ggplot(data = tidy_data, aes(x = Tx)) + geom_histogram()
p10 <- ggplot(data = tidy_data, aes(x = Tx)) + geom_boxplot()
grid.arrange(p9,p10, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Summary statistics of Tx
summary(tidy_data$Ty)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -412.3  -412.3  -412.3  -412.3  -412.3  -412.2
sd(tidy_data$Ty)#Standard deviation
## [1] 0.01752407
#Distribution 
p11 <- ggplot(data = tidy_data, aes(x = Ty)) + geom_histogram()
p12 <- ggplot(data = tidy_data, aes(x = Ty)) + geom_boxplot()
grid.arrange(p11,p12, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Summary statistics of Tx
summary(tidy_data$Tz)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -2255   -2255   -2255   -2255   -2255   -2255
sd(tidy_data$Tz)#Standard deviation
## [1] 0.0255101
#Distribution 
p13 <- ggplot(data = tidy_data, aes(x = Tz)) + geom_histogram()
p14 <- ggplot(data = tidy_data, aes(x = Tz)) + geom_boxplot()
grid.arrange(p13,p14, ncol= 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3D distribution of Tx, Ty and Tz

fig1 <- plot_ly(tidy_data, x = ~Tx, y = ~Ty, z = ~Tz)
fig1
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Again, we notice that the three components form a plane. Now we will try to determine the probability distribution of each component

#Load necessary libraries
#install.packages("fitdistrplus")
library(fitdistrplus)
#install.packages("logspline")
library(logspline)

Determine probability distribution of Q0

#Determine distribution of Q0
plotdist(tidy_data$Q0, histo = TRUE, demp = TRUE)

We use the Cullen and Frey graph to see what type of distribution it may be since the summary statistics did not reveal this info.

descdist(tidy_data$Q0, discrete = FALSE,boot = 1000)

## summary statistics
## ------
## min:  0.0404157   max:  0.0410776 
## median:  0.0407263 
## mean:  0.04072497 
## estimated sd:  0.0001200495 
## estimated skewness:  -0.003252883 
## estimated kurtosis:  2.692386

Q0 seems to be either normal, lognormal or gamma

#Shapiro wilk test for normality
shapiro.test(tidy_data$Q0)
## 
##  Shapiro-Wilk normality test
## 
## data:  tidy_data$Q0
## W = 0.99586, p-value = 0.8493

The p-value of the first test is not less than .05, which indicates that the data is not normally distributed.

Testing for lognormal and gamma distribution as the observation is close to them in the Cullen and Frey graph.

fg <- fitdist(tidy_data$Q0, "gamma")
fl <- fitdist(tidy_data$Q0, "lnorm")

par(mfrow = c(2, 2))
plot.legend <- c("gamma","lognormal")
denscomp(list(fg,fl), legendtext = plot.legend)
qqcomp(list(fg,fl), legendtext = plot.legend)
cdfcomp(list(fg,fl), legendtext = plot.legend)
ppcomp(list(fg,fl), legendtext = plot.legend)

Since the gamma or log-logistic distribution has the min AIC, BIC, and minimum goodness-of-fit statistics, we will choose the gamma or log-logistic distribution.

gofstat(list(fg,fl))
## Goodness-of-fit statistics
##                              1-mle-gamma 2-mle-lnorm
## Kolmogorov-Smirnov statistic  0.03210860  0.03197964
## Cramer-von Mises statistic    0.04562536  0.04549242
## Anderson-Darling statistic    0.27091176  0.27061446
## 
## Goodness-of-fit criteria
##                                1-mle-gamma 2-mle-lnorm
## Akaike's Information Criterion   -3162.206   -3162.204
## Bayesian Information Criterion   -3155.531   -3155.529
fg$estimate
##     shape      rate 
##  115636.2 2839441.2

From the AIC, BIC values above, this could be either gamma or lognormal distribution.

Now checking the translational parameters. Since there are negative values, transforming the variable to their absolute values

tidy_data[,4:10] <- abs(tidy_data[,4:10])

Checking the fit and distribution of Tx

descdist(tidy_data$Tx, discrete = FALSE, boot = 1000)

## summary statistics
## ------
## min:  288.457   max:  288.551 
## median:  288.505 
## mean:  288.5057 
## estimated sd:  0.0149809 
## estimated skewness:  -0.1572988 
## estimated kurtosis:  4.575909

From the graph above, the distribution looks close to logistic Logistic :

f1 <- fitdist(tidy_data$Tx, "logis")
f2 <- fitdist(tidy_data$Tx, "cauchy")

par(mfrow = c(2, 2))
plot.legend <- c("logistic","cauchy")
denscomp(list(f1, f2), legendtext = plot.legend)
qqcomp(list(f1, f2), legendtext = plot.legend)
cdfcomp(list(f1, f2), legendtext = plot.legend)
ppcomp(list(f1, f2), legendtext = plot.legend)

The distribution for tx seems to be logistic

gofstat(list(f1,f2))
## Goodness-of-fit statistics
##                              1-mle-logis 2-mle-cauchy
## Kolmogorov-Smirnov statistic   0.1068135   0.04679342
## Cramer-von Mises statistic     0.5231311   0.09293499
## Anderson-Darling statistic     2.7800304   0.98972851
## 
## Goodness-of-fit criteria
##                                1-mle-logis 2-mle-cauchy
## Akaike's Information Criterion   -1172.552    -1174.616
## Bayesian Information Criterion   -1165.877    -1167.941
f2$estimate
##    location       scale 
## 2.88505e+02 5.69618e-03

Tx seems to be cauchy distribution.

Checking the fit and distribution of ty:

hist(tidy_data$Ty)

descdist(tidy_data$Ty, discrete = FALSE, boot = 1000)

## summary statistics
## ------
## min:  412.221   max:  412.33 
## median:  412.272 
## mean:  412.2716 
## estimated sd:  0.01752407 
## estimated skewness:  0.01873236 
## estimated kurtosis:  4.623855

Logistic or cauchy seem to be probable fits:

d1 <- fitdist(tidy_data$Ty, "logis")
d2 <- fitdist(tidy_data$Ty, "cauchy")

par(mfrow = c(2, 2))
plot.legend <- c("logis","cauchy")
denscomp(list(d1,d2), legendtext = plot.legend)
qqcomp(list(d1,d2), legendtext = plot.legend)
cdfcomp(list(d1,d2), legendtext = plot.legend)
ppcomp(list(d1,d2), legendtext = plot.legend)

Verifying goodness of fit for the cauchy distribution.

#install.packages("EnvStats")
library(EnvStats)
gofstat(d2)
## Goodness-of-fit statistics
##                              1-mle-cauchy
## Kolmogorov-Smirnov statistic   0.07065756
## Cramer-von Mises statistic     0.09573309
## Anderson-Darling statistic     1.00023559
## 
## Goodness-of-fit criteria
##                                1-mle-cauchy
## Akaike's Information Criterion    -1101.388
## Bayesian Information Criterion    -1094.713
#gofTest(tidy_data$ty1, test = "ks", distribution = "cauchy")

The distribution of Ty is cauchy

Checking the fit and distribution of Tz:

hist(tidy_data$Tz)

descdist(tidy_data$Tz, discrete = FALSE, boot = 1000)

## summary statistics
## ------
## min:  2254.678   max:  2254.817 
## median:  2254.749 
## mean:  2254.751 
## estimated sd:  0.0255101 
## estimated skewness:  0.1529465 
## estimated kurtosis:  3.059276

From above, distribution could be normal or gamma

d5 <- fitdist(tidy_data$Tz, "norm")
d6 <- fitdist(tidy_data$Tz, "gamma")
 

par(mfrow = c(2, 2))
plot.legend <- c("normal","gamma")
denscomp(list(d5,d6), legendtext = plot.legend)
qqcomp(list(d5,d6), legendtext = plot.legend)
cdfcomp(list(d5,d6), legendtext = plot.legend)
ppcomp(list(d5,d6), legendtext = plot.legend)

Verifying the goodness of fit for normal distribution

gofstat(list(d5, d6))
## Goodness-of-fit statistics
##                              1-mle-norm 2-mle-gamma
## Kolmogorov-Smirnov statistic 0.06826986   0.0682687
## Cramer-von Mises statistic   0.12696064   0.1269550
## Anderson-Darling statistic   0.69866602   0.6986338
## 
## Goodness-of-fit criteria
##                                1-mle-norm 2-mle-gamma
## Akaike's Information Criterion  -932.8951   -932.8954
## Bayesian Information Criterion  -926.2201   -926.2203
shapiro.test(tidy_data$Tz)
## 
##  Shapiro-Wilk normality test
## 
## data:  tidy_data$Tz
## W = 0.99149, p-value = 0.2649

Tz could be gamma distribution or normal distribution if we try to transform it.

Testing for normality on all before we try transformations.

shapiro.test(tidy_data$Tx)
## 
##  Shapiro-Wilk normality test
## 
## data:  tidy_data$Tx
## W = 0.94987, p-value = 1.194e-06
shapiro.test(tidy_data$Ty)
## 
##  Shapiro-Wilk normality test
## 
## data:  tidy_data$Ty
## W = 0.95179, p-value = 1.851e-06
shapiro.test(tidy_data$Tz)
## 
##  Shapiro-Wilk normality test
## 
## data:  tidy_data$Tz
## W = 0.99149, p-value = 0.2649

Tx and Ty could be normal and Tz could be gamma(from above) Caution: shapiro wilk can be sensitive to large sample sizes so its optimal to decide based on qq plots.

Transforming Tz:

Checking the skewness

#install.packages("moments")
library(moments)

skewness(tidy_data$Tz)
## [1] 0.1518413

Tz is positively skewed.

Trying transformation below: sqrt(x) for positively skewed data,

tidy_data$Tz_t <- (tidy_data$Tz)^0.5
shapiro.test((tidy_data$Tz_t))
## 
##  Shapiro-Wilk normality test
## 
## data:  (tidy_data$Tz_t)
## W = 0.99149, p-value = 0.2649
qqnorm(tidy_data$Tz_t)

p value is less than 0.05 so this is unsuccessful.Checking another transformation

1/x for positively skewed data

tidy_data$Tz_t1 <- 1/(tidy_data$Tz)
shapiro.test((tidy_data$Tz_t1))
## 
##  Shapiro-Wilk normality test
## 
## data:  (tidy_data$Tz_t1)
## W = 0.99149, p-value = 0.265
qqnorm(tidy_data$Tz_t1)

p value is less than 0.05 so this is unsuccessful.Checking log transformation:

log10(x) for positively skewed data,

tidy_data$Tz_t2 <- log10(tidy_data$Tz)
shapiro.test((tidy_data$Tz_t2))
## 
##  Shapiro-Wilk normality test
## 
## data:  (tidy_data$Tz_t2)
## W = 0.99149, p-value = 0.265
qqnorm(tidy_data$Tz_t2)

p value is less than 0.05 so this is unsuccessful.Checking tukey transformation on tx

#install.packages("rcompanion")
library(rcompanion)
## Warning: package 'rcompanion' was built under R version 4.1.2
transformTukey(tidy_data$Tz, returnLambda = T)

## 
##   lambda      W Shapiro.p.value
## 1    -10 0.9915          0.2655
## 
## if (lambda >  0){TRANS = x ^ lambda} 
## if (lambda == 0){TRANS = log(x)} 
## if (lambda <  0){TRANS = -1 * x ^ lambda}

## lambda 
##    -10

The transformation with the provided lambda unsuccessful.

To get better normal distribution, using the bestNormalize package that tries various transformations: arcsinh(x) Box-Cox Center+scale Exp(x) Log_b(x+a) orderNorm (ORQ) sqrt(x + a) Yeo-Johnson

#install.packages("bestNormalize")
library(bestNormalize)
## Registered S3 method overwritten by 'bestNormalize':
##   method      from    
##   plot.boxcox EnvStats
## 
## Attaching package: 'bestNormalize'
## The following object is masked from 'package:EnvStats':
## 
##     boxcox
## The following object is masked from 'package:MASS':
## 
##     boxcox
(BNobject <- bestNormalize::bestNormalize(tidy_data$Tz))
## Best Normalizing transformation with 208 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.1542
##  - Box-Cox: 1.1542
##  - Center+scale: 1.1542
##  - Log_b(x+a): 1.1542
##  - orderNorm (ORQ): 1.0425
##  - sqrt(x + a): 1.1542
##  - Yeo-Johnson: 1.1542
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 208 nonmissing obs and ties
##  - 89 unique values 
##  - Original quantiles:
##       0%      25%      50%      75%     100% 
## 2254.678 2254.735 2254.749 2254.764 2254.817
orderNorm_obj <- bestNormalize::orderNorm(tidy_data$Tz)
## Warning in bestNormalize::orderNorm(tidy_data$Tz): Ties in data, Normal distribution not guaranteed
MASS::truehist(orderNorm_obj$x.t, main = "OrderNorm transformation", nbins = 12)

Note that orderNorm throws a warning due to the ties in tz vector. We can see that the estimated normality statistic for the OrderNorm transformation is close to one, however, so we know it is performing quite well. It is also performing better than all of the other transformations.

Now we verify our data using covariance matrix:

mx <- orderNorm_obj$x.t
df <- data.frame(tidy_data$Tx,tidy_data$Ty,mx)
cov(df)
##               tidy_data.Tx  tidy_data.Ty           mx
## tidy_data.Tx  0.0002244273 -0.0002267902 0.0057908630
## tidy_data.Ty -0.0002267902  0.0003070929 0.0009585919
## mx            0.0057908630  0.0009585919 0.9973927526

We see that Tx and Ty are not quite right because they dont have covariance with themselves.So we transform them to ensure normality.

(BNobject1 <- bestNormalize::bestNormalize(tidy_data$Ty))
## Best Normalizing transformation with 208 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 1.6943
##  - Box-Cox: 1.6943
##  - Center+scale: 1.6943
##  - Exp(x): 5.2
##  - Log_b(x+a): 1.6943
##  - orderNorm (ORQ): 1.1672
##  - sqrt(x + a): 1.6943
##  - Yeo-Johnson: 2.418
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 208 nonmissing obs and ties
##  - 70 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
## 412.221 412.264 412.272 412.279 412.330
orderNorm_obj1 <- bestNormalize::orderNorm(tidy_data$Ty)
## Warning in bestNormalize::orderNorm(tidy_data$Ty): Ties in data, Normal distribution not guaranteed
MASS::truehist(orderNorm_obj1$x.t, main = "OrderNorm transformation", nbins = 12)

(BNobject2 <- bestNormalize::bestNormalize(tidy_data$Tx))
## Best Normalizing transformation with 208 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - arcsinh(x): 2.1125
##  - Box-Cox: 2.1125
##  - Center+scale: 2.1125
##  - Exp(x): 2.1528
##  - Log_b(x+a): 2.1125
##  - orderNorm (ORQ): 1.2023
##  - sqrt(x + a): 2.1125
##  - Yeo-Johnson: 2.1125
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 208 nonmissing obs and ties
##  - 62 unique values 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
## 288.457 288.500 288.505 288.511 288.551
orderNorm_obj2 <- bestNormalize::orderNorm(tidy_data$Tx)
## Warning in bestNormalize::orderNorm(tidy_data$Tx): Ties in data, Normal distribution not guaranteed
MASS::truehist(orderNorm_obj2$x.t, main = "OrderNorm transformation", nbins = 12)

mx <- orderNorm_obj2$x.t
ny <- orderNorm_obj1$x.t
oz <- orderNorm_obj$x.t
df1 <- data.frame(mx,ny,oz)
cov(df1)
##            mx          ny         oz
## mx  0.9966647 -0.81074742 0.38829964
## ny -0.8107474  0.99771444 0.08991525
## oz  0.3882996  0.08991525 0.99739275

Now we look at the multivariate distribution od tx, ty and tz(transformed)

The Multivariate Normal distribution is a Normal distribution WITH a variance-covariance matrix to describe the relationship between a set of variables. The underlying assumption is that each variable follows a Normal distribution & that any two-combinations of variables ALSO follow a Normal distribution. The MVN package provides some nice tools to test if your dataset truly contains data following a Multivariate Normal distribution. Histograms, QQ, Box, and Scatter plots can be requested. There are also multiple tests available to see if variables are univariate or multivariate normal.

Drawing the relationships between all the variables in the dataset

#install.packages("MVN")
library(MVN)
MVN::mvn(df1,mvnTest = "royston",multivariatePlot = "qqplot")  
## $multivariateNormality
##      Test            H p value MVN
## 1 Royston 1.590654e-08       1 YES
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694
mvn(df1,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.499022 0.0001036896  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694

Drawing all the relationships:

mvn(df1,univariatePlot = "histogram")
## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.499022 0.0001036896  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694
mvn(df1,multivariatePlot = "qq")

## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.499022 0.0001036896  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694
mvn(df1,univariatePlot = "box")
## Warning in uniPlot(data, type = univariatePlot): Box-Plots are based on
## standardized values (centered and scaled).
## $multivariateNormality
##            Test       HZ      p value MVN
## 1 Henze-Zirkler 1.499022 0.0001036896  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694
mvn(df1, mvnTest = "energy", univariatePlot = "scatter")

## $multivariateNormality
##          Test Statistic p value MVN
## 1 E-statistic  1.617446       0  NO
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694
mvn(df1, mvnTest = "royston", multivariatePlot = "qqplot", multivariateOutlierMethod="quan")
## $multivariateNormality
##      Test            H p value MVN
## 1 Royston 1.590654e-08       1 YES
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    mx        0.1232    0.9871    YES   
## 2 Anderson-Darling    ny        0.0836    0.9985    YES   
## 3 Anderson-Darling    oz        0.0386    0.9999    YES   
## 
## $Descriptives
##      n          Mean   Std.Dev      Median       Min      Max       25th
## mx 208 -7.293258e-05 0.9983309 -0.03616117 -2.819644 2.819644 -0.6297684
## ny 208 -2.640390e-05 0.9988566 -0.03013230 -2.819644 2.819644 -0.6896972
## oz 208  2.694238e-05 0.9986955 -0.02410453 -2.819644 2.819644 -0.6820738
##         75th          Skew   Kurtosis
## mx 0.6519660 -0.0019173868 -0.1246574
## ny 0.6820738 -0.0001377047 -0.1208774
## oz 0.6594367  0.0012034619 -0.1285694

ggplot(df1, aes(x=df1[,1], y=df1[,2]))+
  geom_point(alpha = .2) +
  geom_density_2d()+
  theme_bw()

ggplot(df1, aes(x=df1[,1], y=df1[,2]))+
  geom_point()+
  stat_ellipse(type = "norm", linetype = 2) +
  geom_smooth()+
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Looking at correlation matrix

#install.packages("corrplot")
library(corrplot)
## corrplot 0.92 loaded
corrplot(cor(df1), 
         method="ellipse",
         tl.pos="n",
         title="Matrix Correlations")

Continuing the journey with Qx, Qy and Qz:

data1 <- read.csv("/Users/disha/Downloads/april11_multiCamera_camD_orb45_002.csv")
data2 <- read.csv("/Users/disha/Downloads/april11_multiCamera_camC_orb45_002.csv")
data3 <- read.csv("/Users/disha/Downloads/april11_multiCamera_camB_orb45_002.csv")
data4 <- read.csv("/Users/disha/Downloads/april11_multiCamera_camA_orb45_002.csv")
all_data <- rbind(data1,data2,data3,data4)
tidy_all_data <- all_data[,c(1,2,3,6:13)]
colnames(tidy_all_data)= c("Tools", "Camera","Frame",  "Q0", "Qx","Qy", "Qz",  "Tx",  "Ty", "Tz" ,"Error")
glimpse(tidy_all_data)
## Rows: 8,363
## Columns: 11
## $ Tools  <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ Camera <chr> "Port 1: NuVasive NavilinkPhantom  s/n:3C4CAC00", "Port 1: NuVa…
## $ Frame  <int> 19974463, 19974466, 19974469, 19974472, 19974475, 19974478, 199…
## $ Q0     <dbl> 0.6942387, 0.6942451, 0.6942489, 0.6942447, 0.6942363, 0.694237…
## $ Qx     <dbl> -0.0892999, -0.0893661, -0.0893937, -0.0894415, -0.0895080, -0.…
## $ Qy     <dbl> 0.0806610, 0.0806984, 0.0807326, 0.0807267, 0.0806936, 0.080587…
## $ Qz     <dbl> -0.7096140, -0.7095951, -0.7095840, -0.7095827, -0.7095864, -0.…
## $ Tx     <dbl> 404.600, 404.556, 404.524, 404.498, 404.473, 404.443, 404.395, …
## $ Ty     <dbl> 291.313, 291.258, 291.172, 291.060, 290.947, 290.848, 290.760, …
## $ Tz     <dbl> -2170.929, -2170.912, -2170.897, -2170.858, -2170.847, -2170.83…
## $ Error  <dbl> 0.1838678, 0.1916115, 0.1948775, 0.1940424, 0.2057476, 0.204443…
fig1 <- plot_ly(tidy_all_data, x = ~Qx, y = ~Qy, z = ~Qz,color= ~Q0)
fig1
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode